logo of company

Recipes for some analyses modifications


Where we present some piece of codes to modify the bioinformatic pipeline

Author: Adrien Taudière

Date: December 2, 2024

See also Easy16S for inspiration in bash.

Change the parameter to merged forward and reverse sequences

  1. Play with option min Overlap (default to 12) and maxMismach (default to 0). For example, replace the merged_seq targets with the following code to allow 1 mismatch and a minimum overlap of 8.
tar_target(
    merged_seq,
    mergePairs(
      dadaF = ddF,
      dadaR = ddR,
      derepF = derep_fs,
      derepR = derep_rs,
      minOverlap = 8,
      maxMismatch = 1)
    ),
    format = "qs"
  )
)

Forward only pipeline

  1. Replace all the “paired end” area with the following code
  ##> Remove primers
  tar_target(
    cutadapt,
    cutadapt_remove_primers(
      path_to_fastq = here("data/data_raw/rawseq/"),
      primer_fw = fw_primer_sequences,
      folder_output = here("data/data_intermediate/seq_wo_primers/"),
      args_before_cutadapt = "source ~/miniforge3/etc/profile.d/conda.sh && conda activate cutadaptenv && "
    )
  ),
  tar_target(data_raw, {
    cutadapt
    list_fastq_files(path = here::here("data/data_intermediate/seq_wo_primers/"),
                     paired_end = FALSE)
  }),

  ##> Classical dada2 pipeline
  tar_target(data_fnfs, data_raw$fnfs),
  ### Pre-filtered data with low stringency
  tar_target(
    filtered,
    filter_trim(
      output_fw = paste(
        getwd(),
        here("/data/data_intermediate/filterAndTrim_fwd"),
        sep = ""
      ),
      rev = data_fnrs,
      multithread = n_threads,
      compress = TRUE
    )
  ),

  ### Dereplicate fastq files
  tar_target(derep_fs, derepFastq(filtered[[1]]), format = "qs"),
  tar_target(derep_rs, derepFastq(filtered[[2]]), format = "qs"),
  ### Learns the error rates
  tar_target(err_fs, learnErrors(derep_fs, multithread = 4), format = "qs"),
  tar_target(err_rs, learnErrors(derep_rs, multithread = 4), format = "qs"),
  ### Make amplicon sequence variants
  tar_target(ddF, dada(derep_fs, err_fs, multithread = 4), format = "qs"),
  tar_target(ddR, dada(derep_rs, err_rs, multithread = 4), format = "qs"),
  ### Build a a table of ASV x Samples
  tar_target(seq_tab, makeSequenceTable(ddF)),

Add a second taxonomic assignation using a different database or algorithm

  1. Add a new database file (fasta) in data/data_raw/refseq
  2. Copy and complete with good names the two targets below
  3. Rename the targets by using the new name (e.g. data_phyloseq_newDB) instead of data_phyloseq in the subsequent targets
[...]
tar_target(
    name = file_refseq_taxo2,
    command = "data/data_raw/refseq/XXX",
    format = "file"
)

[...]

tar_target(
  data_phyloseq_newDB,
  add_new_taxonomy_pq(
    data_phyloseq,
    file_refseq_taxo2,
    suffix = "PR2",
    taxLevels = c(
      "Kingdom",
      "Supergroup",
      "Division",
      "Subdivision",
      "Class",
      "Order",
      "Family",
      "Genus",
      "Species"
    )
  )
)

Filter by % sequence (whith blast)

tar_target(d_blast,
    filter_taxa_blast(
      data_phyloseq,
      fasta_for_db = paste0(here::here(), "/", file_refseq_taxo),
      nproc = 4
    )
)

Add funguild informations for Fungi

tar_target(d_funguild, 
  MiscMetabar::add_funguild_info(data_phyloseq)
)

Add Protax informations for Bacteria